home *** CD-ROM | disk | FTP | other *** search
- ;;; JACAL: Symbolic Mathematics System. -*-scheme-*-
- ;;; Copyright 1989, 1990, 1991, 1992 Aubrey Jaffer.
- ;;; See the file "COPYING" for terms applying to this program.
-
- ;;; This implements a lexer which separates tokens according to
- ;;; character class and a Pratt style parser.
- ;;; (CGOL:TOP-PARSE delimiter) returns one parsed object. delimiter
- ;;; must be a character or string.
-
- ;;; References are:
-
- ;;; Pratt, V. R.
- ;;; Top Down Operator Precendence.
- ;;; SIGACT/SIGPLAN
- ;;; Symposium on Principles of Programming Languages,
- ;;; Boston, 1973, 41-51
-
- ;;; WORKING PAPER 121
- ;;; CGOL - an Alternative External Representation For LISP users
- ;;; Vaughan R. Pratt
- ;;; MIT Artificial Intelligence Lab.
- ;;; March 1976
-
- ;;; Mathlab Group,
- ;;; MACSYMA Reference Manual, Version Ten,
- ;;; Laboratory for Computer Science, MIT, 1983
-
- (define *syn-rules* #f)
- (define *syn-defs* #f)
- (define *lex-rules* #f)
- (define *lex-defs* #f)
-
- (define lex:column 0)
- (define lex:peek-char peek-char)
- (define (lex:read-char)
- (let ((c (read-char)))
- (if (or (eqv? c #\newline) (eof-object? c))
- (set! lex:column 0)
- (set! lex:column (+ 1 lex:column)))
- c))
- (define (lex:bump-column pos)
- (cond ((eqv? #\newline (lex:peek-char))
- (lex:read-char))) ;to do newline
- (set! lex:column (+ lex:column pos)))
- (define (cgol:warn msg)
- (do ((j (+ -1 lex:column) (- j 8)))
- ((> 8 j)
- (do ((i j (- i 1)))
- ((>= 0 i))
- (display-diag #\ )))
- (display-diag slib:tab))
- (display-diag "^ ")
- (display-diag msg)
- (newline-diag))
-
- ;(require 'record)
- ;(define lex-rtd (make-record-type "lexrec" '(cc sfp)))
- ;(define lex:make-rec (record-constructor lex-rtd))
- ;(define lex:cc (record-accessor lex-rtd 'cc))
- ;(define lex:sfp (record-accessor lex-rtd 'sfp))
-
- (define lex:make-rec cons)
- (define lex:cc car)
- (define lex:sfp cdr)
-
- (define lex:tab-get (hash-inquirer char=?))
- (define lex:tab-set! (hash-associator char=?))
- (define (lex:def-class bp chrlst string-fun)
- (for-each
- (lambda (token)
- (let ((oldlexrec (lex:tab-get *lex-defs* token)))
- (lex:tab-set! *lex-defs* token (lex:make-rec bp string-fun))
- (cond ((or (not oldlexrec) (eqv? (lex:cc oldlexrec) bp)) #t)
- (else (math:warn "cc of " token " redefined to " bp)))))
- chrlst))
-
- ;;; CGOL:SXOP-LBP is the left binding power of this sxop.
- ;;; CGOL:SXOP-RBP is the right binding power of this sxop.
- ;;; CGOL:SXOP-LED is the left denotation (function to call when
- ;;; unclaimed token on left).
- ;;; CGOL:SXOP-NUD is the null denotation (function to call when no
- ;;; unclaimed tokens).
-
- ;(define sxop-rtd
- ; (make-record-type "sxop" '(name lame lbp rbp nud led)))
- ;(define cgol:make-sxop (record-constructor sxop-rtd))
- ;(define cgol:sxop-name (record-accessor sxop-rtd 'name))
- ;(define cgol:sxop-lame (record-accessor sxop-rtd 'lame))
- ;(define cgol:sxop-lbp (record-accessor sxop-rtd 'lbp))
- ;(define cgol:sxop-led (record-accessor sxop-rtd 'led))
- ;(define cgol:sxop-rbp (record-accessor sxop-rtd 'rbp))
- ;(define cgol:sxop-nud (record-accessor sxop-rtd 'nud))
- ;;sxop-match overloaded on sxop-rbp
- ;(define cgol:sxop-match cgol:sxop-rbp)
-
- ;(define cgol:sxop-set-name! (record-modifier sxop-rtd 'name))
- ;(define cgol:sxop-set-lame! (record-modifier sxop-rtd 'lame))
- ;(define cgol:sxop-set-lbp! (record-modifier sxop-rtd 'lbp))
- ;(define cgol:sxop-set-led! (record-modifier sxop-rtd 'led))
- ;(define cgol:sxop-set-rbp! (record-modifier sxop-rtd 'rbp))
- ;(define cgol:sxop-set-nud! (record-modifier sxop-rtd 'nud))
- ;;sxop-match overloaded on sxop-rbp
- ;(define cgol:sxop-set-match! cgol:sxop-set-rbp!)
-
- (define (cgol:make-sxop name lame lbp rbp nud led)
- (cons (cons name lame) (cons (cons lbp rbp) (cons nud led))))
- (define cgol:sxop-name caar)
- (define cgol:sxop-lame cdar)
- (define cgol:sxop-lbp caadr)
- (define cgol:sxop-rbp cdadr)
- (define cgol:sxop-nud caddr)
- (define cgol:sxop-led cdddr)
- ;;sxop-match overloaded on sxop-rbp
- (define cgol:sxop-match cgol:sxop-rbp)
-
- (define (cgol:sxop-set-name! pob val) (set-car! (car pob) val))
- (define (cgol:sxop-set-lame! pob val) (set-cdr! (car pob) val))
- (define (cgol:sxop-set-lbp! pob val) (set-car! (cadr pob) val))
- (define (cgol:sxop-set-rbp! pob val) (set-cdr! (cadr pob) val))
- (define (cgol:sxop-set-nud! pob val) (set-car! (cddr pob) val))
- (define (cgol:sxop-set-led! pob val) (set-cdr! (cddr pob) val))
- ;;sxop-match overloaded on sxop-rbp
- (define cgol:sxop-set-match! cgol:sxop-set-rbp!)
-
- (define cgol:sxop-get (hash-inquirer equal?))
- (define cgol:sxop-set! (hash-associator equal?))
-
- ;(define cgol:null-sxop #f)
-
- (define (cgol:defield tokens value cap accessor modifier)
- (for-each
- (lambda (tok)
- (let* ((token (if (symbol? tok) (symbol->string tok) tok))
- (a (cgol:sxop-get *syn-defs* token)))
- (cond ((not a)
- (set! a (cgol:make-sxop #f #f #f #f #f #f))
- ; (if (equal? "" tok) (set! cgol:null-sxop a))
- (cgol:sxop-set! *syn-defs* token a)))
- (cond ((eqv? value (accessor a)))
- ((not (accessor a)) (modifier a value))
- (else (math:warn cap " of " token
- " redefined from " (accessor a)
- " to " value)
- (modifier a value)))))
- (if (pair? tokens)
- tokens
- (list tokens))))
-
- (define (cgol:defname tokens value)
- (cgol:defield tokens value "name" cgol:sxop-name cgol:sxop-set-name!))
- (define (cgol:deflame tokens value)
- (cgol:defield tokens value "lame" cgol:sxop-lame cgol:sxop-set-lame!))
- (define (cgol:deflbp tokens value)
- (cgol:defield tokens value "lbp" cgol:sxop-lbp cgol:sxop-set-lbp!))
- (define (cgol:defled tokens value)
- (cgol:defield tokens value "led" cgol:sxop-led cgol:sxop-set-led!))
- (define (cgol:defrbp tokens value)
- (cgol:defield tokens value "rbp" cgol:sxop-rbp cgol:sxop-set-rbp!))
- ;;sxop-match overloaded on sxop-rbp
- (define (cgol:defmatch tokens value)
- (cgol:defield tokens value "match" cgol:sxop-rbp cgol:sxop-set-rbp!))
- (define (cgol:defnud tokens value)
- (cgol:defield tokens value "nud" cgol:sxop-nud cgol:sxop-set-nud!))
-
- ;;;Calls to set up tables.
-
- (define (cgol:delim x lbp)
- (cgol:deflbp x lbp)
- (cgol:defrbp x -2)
- (cgol:defled x #f)
- (cgol:defnud x #f))
- (define (cgol:separator x lbp)
- (cgol:deflbp x lbp)
- (cgol:defrbp x -1)
- (cgol:defled x #f)
- (cgol:defnud x #f))
- (define (cgol:prefix op sop rbp)
- (cgol:defname op sop)
- (cgol:defrbp op rbp)
- (cgol:defnud op cgol:parse-prefix))
- (define (cgol:postfix op sop lbp)
- (cgol:deflame op sop)
- (cgol:deflbp op lbp)
- (cgol:defled op cgol:parse-postfix))
- (define (cgol:infix op sop lbp rbp)
- (cgol:deflame op sop)
- (cgol:deflbp op lbp)
- (cgol:defrbp op rbp)
- (cgol:defled op cgol:parse-infix))
- (define (cgol:nary op sop bp)
- (cgol:deflame op sop)
- (cgol:deflbp op bp)
- (cgol:defrbp op bp)
- (cgol:defled op cgol:parse-nary))
- (define (cgol:nofix op sop)
- (cgol:defname op sop)
- (cgol:defnud op cgol:parse-nofix))
- (define (cgol:commentfix op sop)
- (cgol:defname op sop)
- (cgol:deflame op sop)
- (cgol:deflbp op 220)
- (cgol:defrbp op 220)
- (cgol:defnud op cgol:parse-precomment)
- (cgol:defled op cgol:parse-postcomment))
- (define (cgol:rest op sop bp)
- (cgol:defname op sop)
- (cgol:defnud op cgol:parse-rest)
- (cgol:defrbp op bp))
- (define (cgol:matchfix op sop match)
- (cgol:defname op sop)
- (cgol:delim match 0)
- (cgol:defmatch op match)
- (cgol:defnud op cgol:parse-matchfix))
- (define (cgol:inmatchfix op sop match lbp)
- (cgol:deflame op sop)
- (cgol:defmatch op match)
- (cgol:delim match 0)
- (cgol:deflbp op lbp)
- (cgol:defled op cgol:parse-inmatchfix))
-
- ;;;; Here is the code which actually lexes and parses.
-
- (define cgol:char0 (integer->char 0))
- (define (lex:tab-geteof x)
- (lex:tab-get *lex-rules* (if (eof-object? x) cgol:char0 x)))
- (define (lex)
- (let* ((char (lex:read-char))
- (rec (lex:tab-geteof char))
- (proc (and rec (lex:cc rec)))
- (clist (list char)))
- (cond
- ((not proc) char)
- ((procedure? proc)
- (do ((cl clist (begin (set-cdr! cl (list (lex:read-char))) (cdr cl))))
- ((proc (lex:peek-char))
- (funcall (lex:sfp rec) clist))))
- ((eqv? 0 proc) (lex))
- (else
- (do ((cl clist (begin (set-cdr! cl (list (lex:read-char))) (cdr cl))))
- ((not (let* ((prec (lex:tab-geteof (lex:peek-char)))
- (cclass (and prec (lex:cc prec))))
- (or (eqv? cclass proc)
- (eqv? cclass (- proc 1)))))
- (if (lex:sfp rec)
- (funcall (lex:sfp rec) clist)
- (list->string clist))))))))
-
- ;;; Now for the way we use LEX.
- (define cgol:token #f)
- (define cgol:pob #f)
- (define (cgol:advance)
- (set! cgol:token (lex))
- (set! cgol:pob (cgol:sxop-get *syn-rules* cgol:token))
- cgol:token)
-
- ;;; Now actual parsing.
- (define (cgol:nudcall)
- (let* ((obj cgol:token) (pob cgol:pob))
- (cond
- ((cgol:at-sep?) (cgol:warn 'extra-separator)
- (cgol:advance)
- (cgol:nudcall))
- (pob (let ((proc (cgol:sxop-nud pob)))
- (cond (proc (proc pob))
- (else (cgol:advance)
- (let ((name (cgol:sxop-name pob)))
- (or (and (not (procedure? name)) name)
- (cgol:sxop-lame pob)
- '?))))))
- (else (cgol:advance)
- (if (string? obj) (string->symbol obj) obj)))))
- (define (cgol:ledcall left)
- (let* ((pob cgol:pob))
- (cond
- ((cgol:at-sep?) (cgol:warn 'extra-separator)
- (cgol:advance)
- (cgol:ledcall left))
- (pob (let ((proc (cgol:sxop-led pob)))
- (cond (proc (proc pob left))
- (else (cgol:warn 'not-an-operator)
- (cgol:advance)
- left))))
- (else left))))
-
- (define (cgol:parse bp)
- (do ((left (cgol:nudcall)
- (cgol:ledcall left)))
- ((or (>= bp 200) ;to avoid unneccesary lookahead
- (>= bp (if cgol:pob (or (cgol:sxop-lbp cgol:pob) 0) 0)))
- left)))
-
- (define (cgol:at-sep?)
- (and cgol:pob (eqv? (cgol:sxop-rbp cgol:pob) -1)))
-
- (define (cgol:at-delim?)
- (or (eof-object? cgol:token)
- (and cgol:pob (eqv? (cgol:sxop-rbp cgol:pob) -2))))
-
- (define (cgol:parse-list sep bp)
- (let ((f (cgol:parse bp)))
- (cons f (cond ((equal? sep cgol:token)
- (cgol:advance)
- (cond
- ((equal? sep cgol:token) (cgol:warn 'expression-missing)
- (cgol:advance)
- (cons '? (cgol:parse-list sep bp)))
- ((cgol:at-delim?)
- (cgol:warn 'expression-missing)
- '(?))
- (else (cgol:parse-list sep bp))))
- (sep '())
- ((cgol:at-delim?) '())
- (else (cgol:parse-list sep bp))))))
-
- (define cgol:arg-separator #f)
- (define cgol:arg-lbp #f)
- (define (cgol:parse-delimited delim)
- (cond ((cgol:at-sep?)
- (cgol:warn 'expression-missing)
- (cgol:advance)
- (cons '? (cgol:parse-delimited delim)))
- ((cgol:at-delim?)
- (if (eqv? delim cgol:token) #t
- (cgol:warn 'mismatched-delimiter))
- (cgol:advance)
- '())
- (else
- (let ((ans (cgol:parse-list cgol:arg-separator cgol:arg-lbp)))
- (cond ((eqv? delim cgol:token))
- ((cgol:at-delim?)
- (cgol:warn 'mismatched-delimiter))
- (else
- (cgol:warn 'delimiter-expected--ignoring-rest)
- (do () ((cgol:at-delim?)) (cgol:parse cgol:arg-lbp))))
- (cgol:advance)
- ans))))
-
- (define (cgol:top-parse delim)
- (let ((tmp (cgol:sxop-get *syn-defs* cgol:arg-separator)))
- (if tmp (set! cgol:arg-lbp (cgol:sxop-lbp tmp))))
- (cgol:advance) ;to get first token
- (cond ((eof-object? cgol:token) (let ((eof cgol:token)) eof))
- ((equal? cgol:token delim) #f)
- ((cgol:at-sep?) (cgol:warn 'extra-separator) #f)
- ((cgol:at-delim?) (cgol:warn 'extra-delimiter) #f)
- (else
- (let ((ans (cgol:parse 0)))
- (cond ((eof-object? cgol:token))
- ((equal? delim cgol:token))
- (else
- (cgol:warn 'delimiter-expected--ignoring-rest)
- (do () ((or (equal? delim cgol:token)
- (eof-object? cgol:token)))
- (cgol:advance))))
- ans))))
-
- (define (call-or-list1 proc arg)
- (if proc (if (procedure? proc) (proc arg) (list proc arg))
- arg))
- (define (call-or-list2 proc arg1 arg2)
- (if proc (if (procedure? proc) (proc arg1 arg2) (list proc arg1 arg2))
- (list arg1 arg2)))
- (define (apply-or-cons proc args)
- (if proc (if (procedure? proc) (apply proc args) (cons proc args))
- args))
-
- ;;;next level of abstraction
-
- (define (cgol:parse-matchfix pob)
- (define name (cgol:sxop-name pob))
- (cgol:advance)
- (cond
- (name
- (apply-or-cons name (cgol:parse-delimited (cgol:sxop-match pob))))
- ((cgol:at-sep?)
- (cgol:warn 'extra-separator)
- (cgol:parse-matchfix pob))
- ((cgol:at-delim?) (cgol:warn 'expression-missing) (cgol:advance) '?)
- (else ;just parenthesized expression
- (let ((ans (cgol:parse cgol:arg-lbp)))
- (do () ((not (cgol:at-sep?)))
- (cgol:warn 'extra-separator) (cgol:advance))
- (do ((left ans (cgol:ledcall left))) ;restart parse
- ((>= cgol:arg-lbp (if cgol:pob (or (cgol:sxop-lbp cgol:pob) 0) 0))
- (set! ans left)))
- (cond ((equal? (cgol:sxop-match pob) cgol:token) (cgol:advance) ans)
- ((cgol:at-delim?) (cgol:warn 'mismatched-delimiter)
- (cgol:advance) ans)
- (else (cgol:warn 'delimiter-expected--ignoring-rest)
- (do () ((cgol:at-delim?)) (cgol:parse cgol:arg-lbp))
- (cgol:advance)
- ans))))))
- (define (cgol:parse-rest pob)
- (cgol:advance) ;past this token
- (cons (cgol:sxop-name pob)
- (cond ((cgol:at-delim?) '())
- (else
- (cond ((cgol:at-sep?)
- (cgol:warn 'extra-separator)
- (cgol:advance)))
- (cgol:parse-list #f (cgol:sxop-rbp pob))))))
- (define (cgol:parse-inmatchfix pob left)
- (define lame (cgol:sxop-lame pob))
- (cgol:advance) ;past this token
- (apply-or-cons
- lame (cons left (cgol:parse-delimited (cgol:sxop-match pob)))))
- (define (cgol:parse-prefix pob)
- (define name (cgol:sxop-name pob))
- (cgol:advance) ;past this token
- (cond ((cgol:at-delim?) (or (and (not (procedure? name)) name)
- (cgol:sxop-lame pob)))
- (else
- (call-or-list1 name (cgol:parse (cgol:sxop-rbp pob))))))
- (define (cgol:parse-nofix pob)
- (define name (cgol:sxop-name pob))
- (cgol:advance) ;past this token
- (apply-or-cons name '()))
- (define (cgol:parse-precomment pob)
- (define name (cgol:sxop-name pob))
- (name)
- (cgol:advance) ;past this token
- (cgol:parse (cgol:sxop-rbp pob)))
- (define (cgol:parse-postcomment pob left)
- (define lame (cgol:sxop-lame pob))
- (lame)
- (cgol:advance) ;past this token
- left)
- (define (cgol:parse-postfix pob left)
- (define lame (cgol:sxop-lame pob))
- (cgol:advance) ;past this token
- (call-or-list1 lame left))
- (define (cgol:parse-infix pob left)
- (define lame (cgol:sxop-lame pob))
- (cgol:advance)
- (cond ((cgol:at-delim?)
- (cgol:warn 'expression-missing)
- (call-or-list2 lame left '?))
- (else
- (call-or-list2 lame left (cgol:parse (cgol:sxop-rbp pob))))))
- (define (cgol:parse-nary pob left)
- (define self cgol:token)
- (define lame (cgol:sxop-lame pob))
- (cgol:advance)
- (cond ((cgol:at-delim?)
- (cgol:warn 'expression-missing)
- (call-or-list2 lame left '?))
- (else
- (apply-or-cons
- lame (cons left (cgol:parse-list self (cgol:sxop-rbp pob)))))))
-